# This file creates estimates for parents for whom pretreatment co --------

# install.packages("dplyr")
# install.packages("date")
# install.packages("AER")
# install.packages("rio")

library(dplyr)
library(date)
library(AER)
library(rio)

# Load data

load("fathers.rdata")

# load administrative 
udd <- import(paste("udda", 2007, ".dta", sep = ""))
ind <- import(paste("indh", 2007, ".dta", sep = ""))
bef <- import(paste("bef", 2007, ".dta", sep = ""))
ras <- import(paste("ras", 2007, ".dta", sep = ""))
lpr <- import(paste("lpradm",2007,".sas7bdat", sep = ""))

# Filter of if age of oldest is unkwown (select parents )
fathers <-
  fathers %>%
  filter(!is.na(age_oldest)) 

# select parents who had children between 2009 and 2013 
# one month after 2009 to one month before 2013

fathers13 <- 
  fathers %>%
  filter(! is.na(stemte_2009) & !is.na(stemt) & first13 > 0) %>%
  filter(age_oldest >= (14565 + 30) & age_oldest <= (16028 - 30)) %>%
  mutate(vote09 = stemte_2009 == "Stemte",
         vote = stemt == "Stemte",
         no_children = no_children_13)

fathers13 <-
  within(fathers13, {
    age        <- ntile(date, 25)
    twin_first <- twin_first > 0
  })

fathers13 <-
  fathers13 %>%
  group_by(age) %>%
  mutate(child_age = ntile(age_oldest, 2))

# take log of days in hospital
lpr <- 
  lpr %>% 
  filter(!is.na(V_SENGDAGE)) %>% 
  group_by(pnr) %>% 
  summarise(logbeddays = log(sum(V_SENGDAGE)))

# merge and recode
fathers13 <-
  left_join(fathers13, ras, by = "pnr") %>% 
  mutate(SOCSTIL_KODE = if_else(is.na(SOCSTIL_KODE), 500,SOCSTIL_KODE)) %>% 
  group_by(pnr) %>% 
  mutate(SOCSTIL_KODE_min = min(SOCSTIL_KODE)) %>% 
  filter(SOCSTIL_KODE_min ==SOCSTIL_KODE) %>% 
  sample_n(1) %>%
  mutate(full_time =SOCSTIL_KODE < 200) %>%
  left_join(., udd, by = "pnr") %>% 
  mutate(primary_school = hfaudd_hovedgruppe == 10, 
         high_school    = hfaudd_hovedgruppe == 20 | hfaudd_hovedgruppe == 25,
         vocational     = hfaudd_hovedgruppe == 35, 
         short_tertiary = hfaudd_hovedgruppe == 40, 
         bachelor       = hfaudd_hovedgruppe == 50 | hfaudd_hovedgruppe == 60,
         masters_phd    = hfaudd_hovedgruppe == 65 | hfaudd_hovedgruppe == 70) %>%
  left_join(., ind, by = "pnr") %>% 
  mutate(log_income = ifelse(perindkialt > 0, log(perindkialt), 0)) %>%
  left_join(., bef, by = "pnr") %>% 
  mutate(danish = IE_TYPE == 1)  %>%
  ungroup() %>%
  mutate(log_income_mis = is.na(log_income),
         log_income = ifelse(is.na(log_income), mean(log_income, na.rm = TRUE), log_income),
         primary_school = ifelse(is.na(primary_school) , FALSE, primary_school ),
         high_school    = ifelse(is.na(high_school   ) , FALSE, high_school    ),
         vocational     = ifelse(is.na(vocational    ) , FALSE, vocational     ),
         short_tertiary = ifelse(is.na(short_tertiary) , FALSE, short_tertiary ),
         bachelor       = ifelse(is.na(bachelor      ) , FALSE, bachelor       ),
         masters_phd    = ifelse(is.na(masters_phd   ) , FALSE, masters_phd    ),
         danish = ifelse(is.na(danish), FALSE, danish)) %>%
  left_join(., lpr, by = "pnr")  %>% 
  mutate(logbeddays = ifelse(!is.na(logbeddays), logbeddays, 0))

# run models 

mod0 <- lm(vote ~ twin_first, data = fathers13)
mod1 <-
  lm(vote ~ twin_first + as.factor(age)*as.factor(child_age),
     data = fathers13)

mod2 <-
  lm(vote ~ twin_first + vote09 + as.factor(age)*as.factor(child_age),
     data = fathers13)

mod3 <- lm(vote ~ twin_first + vote09 + as.factor(age) * as.factor(child_age) +
             primary_school + high_school + vocational + short_tertiary +
             bachelor +  masters_phd + log_income + full_time + danish + log_income_mis +
             logbeddays, 
           data = fathers13)

mod1_iv  <- ivreg(vote ~ no_children + as.factor(age) * as.factor(child_age) | 
                    twin_first + as.factor(age) * as.factor(child_age), data = fathers13)

mod2_iv  <- ivreg(vote ~ no_children + vote09 + as.factor(age) * as.factor(child_age) | 
                    twin_first + vote09 + as.factor(age) * as.factor(child_age), data = fathers13)

mod3_iv  <- ivreg(vote ~ no_children + vote09 + as.factor(age) * as.factor(child_age) +
                    primary_school + high_school + vocational + short_tertiary +
                    bachelor +  masters_phd + log_income + full_time + danish  + log_income_mis + logbeddays | 
                    twin_first + vote09 + as.factor(age) * as.factor(child_age) +
                    primary_school + high_school + vocational + short_tertiary +
                    bachelor +  masters_phd + log_income + full_time + danish + log_income_mis + logbeddays, data = fathers13)

table_dad_13 <- rbind(c(summary(mod0)$coefficients[1,], with(fathers13, table(twin_first))),
                      c(summary(mod1)$coefficients[2,], with(fathers13, table(twin_first))),
                      c(summary(mod2)$coefficients[2,], with(fathers13, table(twin_first))),
                      c(summary(mod3)$coefficients[2,], with(fathers13, table(twin_first))),
                      c(summary(mod1_iv)$coefficients[2,], with(fathers13, table(twin_first))),
                      c(summary(mod2_iv)$coefficients[2,], with(fathers13, table(twin_first))),
                      c(summary(mod3_iv)$coefficients[2,], with(fathers13, table(twin_first))))

# Find F diagnostics 
sum1_iv <- summary(mod1_iv, diagnostics = TRUE)
sum2_iv <- summary(mod2_iv, diagnostics = TRUE)
sum3_iv <- summary(mod3_iv, diagnostics = TRUE)

f_diag_fathers <- 
  rbind(sum1_iv$diagnostics[1,],
        sum2_iv$diagnostics[1,],
        sum3_iv$diagnostics[1,])

## Repeat for 2014

# select parents who had children between 2009 and 2014 
# one month after 2009 to one month before 2014

fathers14 <- 
  fathers %>%
  filter(! is.na(stemte_2009) & !is.na(ep_stemt) & first13 > 0) %>%
  filter(age_oldest >= (14565 + 30) & age_oldest <= (16215 - 30)) %>%
  mutate(vote09 = stemte_2009 == "Stemte",
         vote = ep_stemt,
         no_children = no_children_14)

fathers14 <-
  within(fathers14, {
    age        <- ntile(date, 25)
    twin_first <- twin_first > 0
  })

fathers14 <-
  fathers14 %>%
  group_by(age) %>%
  mutate(child_age = ntile(age_oldest, 2))

fathers14 <-
  left_join(fathers14, ras, by = "pnr") %>% 
  mutate(SOCSTIL_KODE = if_else(is.na(SOCSTIL_KODE), 500,SOCSTIL_KODE)) %>% 
  group_by(pnr) %>% 
  mutate(SOCSTIL_KODE_min = min(SOCSTIL_KODE)) %>% 
  filter(SOCSTIL_KODE_min ==SOCSTIL_KODE) %>% 
  sample_n(1) %>%
  mutate(full_time =SOCSTIL_KODE < 200) %>%
  left_join(., udd, by = "pnr") %>% 
  mutate(primary_school = hfaudd_hovedgruppe == 10, 
         high_school    = hfaudd_hovedgruppe == 20 | hfaudd_hovedgruppe == 25,
         vocational     = hfaudd_hovedgruppe == 35, 
         short_tertiary = hfaudd_hovedgruppe == 40, 
         bachelor       = hfaudd_hovedgruppe == 50 | hfaudd_hovedgruppe == 60,
         masters_phd    = hfaudd_hovedgruppe == 65 | hfaudd_hovedgruppe == 70) %>%
  left_join(., ind, by = "pnr") %>% 
  mutate(log_income = ifelse(perindkialt > 0, log(perindkialt), 0)) %>%
  left_join(., bef, by = "pnr") %>% 
  mutate(danish = IE_TYPE == 1)  %>%
  ungroup() %>%
  mutate(log_income_mis = is.na(log_income),
         log_income = ifelse(is.na(log_income), mean(log_income, na.rm = TRUE), log_income),
         primary_school = ifelse(is.na(primary_school) , FALSE, primary_school ),
         high_school    = ifelse(is.na(high_school   ) , FALSE, high_school    ),
         vocational     = ifelse(is.na(vocational    ) , FALSE, vocational     ),
         short_tertiary = ifelse(is.na(short_tertiary) , FALSE, short_tertiary ),
         bachelor       = ifelse(is.na(bachelor      ) , FALSE, bachelor       ),
         masters_phd    = ifelse(is.na(masters_phd   ) , FALSE, masters_phd    ),
         danish = ifelse(is.na(danish), FALSE, danish)) %>%
  left_join(., lpr, by = "pnr")  %>% 
  mutate(logbeddays = ifelse(!is.na(logbeddays), logbeddays, 0))


mod0 <- lm(vote ~ twin_first, data = fathers14)
mod1 <-
  lm(vote ~ twin_first + as.factor(age)*as.factor(child_age),
     data = fathers14)

mod2 <-
  lm(vote ~ twin_first + vote09 + as.factor(age)*as.factor(child_age),
     data = fathers14)

mod3 <- lm(vote ~ twin_first + vote09 + as.factor(age) * as.factor(child_age) +
             primary_school + high_school + vocational + short_tertiary +
             bachelor +  masters_phd + log_income + full_time + danish + log_income_mis +
             logbeddays, 
           data = fathers14)

mod1_iv  <- ivreg(vote ~ no_children + as.factor(age) * as.factor(child_age) | 
                    twin_first + as.factor(age) * as.factor(child_age), data = fathers14)

mod2_iv  <- ivreg(vote ~ no_children + vote09 + as.factor(age) * as.factor(child_age) | 
                    twin_first + vote09 + as.factor(age) * as.factor(child_age), data = fathers14)

mod3_iv  <- ivreg(vote ~ no_children + vote09 + as.factor(age) * as.factor(child_age) +
                    primary_school + high_school + vocational + short_tertiary +
                    bachelor +  masters_phd + log_income + full_time + danish  + log_income_mis + logbeddays | 
                    twin_first + vote09 + as.factor(age) * as.factor(child_age) +
                    primary_school + high_school + vocational + short_tertiary +
                    bachelor +  masters_phd + log_income + full_time + danish + log_income_mis + logbeddays, data = fathers14)

table_dad_14 <- rbind(c(summary(mod0)$coefficients[1,], with(fathers14, table(twin_first))),
                      c(summary(mod1)$coefficients[2,], with(fathers14, table(twin_first))),
                      c(summary(mod2)$coefficients[2,], with(fathers14, table(twin_first))),
                      c(summary(mod3)$coefficients[2,], with(fathers14, table(twin_first))),
                      c(summary(mod1_iv)$coefficients[2,], with(fathers14, table(twin_first))),
                      c(summary(mod2_iv)$coefficients[2,], with(fathers14, table(twin_first))),
                      c(summary(mod3_iv)$coefficients[2,], with(fathers14, table(twin_first))))

table_dad_out <- 
  rbind(table_dad_13, table_dad_14)

# Find F diagnostics 
sum1_iv <- summary(mod1_iv, diagnostics = TRUE)
sum2_iv <- summary(mod2_iv, diagnostics = TRUE)
sum3_iv <- summary(mod3_iv, diagnostics = TRUE)

f_diag_fathers <- 
  rbind(f_diag_fathers,
        sum1_iv$diagnostics[1,],
        sum2_iv$diagnostics[1,],
        sum3_iv$diagnostics[1,])

write.table(table_dad_out, file = "father_post09.txt")

write.table(f_diag_fathers, file = "f_stat_fathers_post09.txt")

